## Warning: package 'BiocGenerics' was built under R version 4.0.5
## Warning: package 'GenomeInfoDb' was built under R version 4.0.5
load("~/Documents/MiGASti/Databases/gene_matrix.RData")
metadata <- read.table("~/Documents/MiGASti/Databases/metadata2.txt")
#set rownames to Sample
row.names(metadata) <- metadata$Sample
setwd("~/Documents/MiGASti/Databases")
#exclude samples that did not pass QC filtering
exclude <- read.table("samples2remove2.txt")
exclude <- exclude$x
genes_counts_filt = genes_counts[, !colnames(genes_counts) %in% exclude]
#Excludes the samples from filters.
#dim(genes_counts_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
#order metadata and genes counts
genes_counts_ordered <- genes_counts_filt[,rownames(metadata_filt)]
#head(genes_counts_ordered)
all(rownames(metadata_filt) == colnames (genes_counts_ordered)) #TRUE#remove uncultured samples
metadata_cultured <- metadata_filt[metadata_filt$Stimulation != "ununstim",]
#remove uncultured samples
metadata_cultured_subset <- metadata_cultured[metadata_cultured$Stimulation != "TNFa",]
#check numbers
#dim(metadata_cultured)
#check numbers per stimulation
#table(metadata_filt$Stimulation)
#select only GFM samples in metadata
metadata_GFM = metadata_cultured_subset[metadata_cultured_subset$Region=="GFM",]
#select only GFM samples in genes counts
genes_counts_GFM <- genes_counts_ordered[,metadata_GFM$Sample]
#order metadata and genes_counts
genes_counts_GFM_ordered <- genes_counts_GFM[,rownames(metadata_GFM)]
#check ordering
all(rownames(metadata_GFM) == colnames (genes_counts_GFM_ordered)) #TRUE[1] TRUE
#check numbers
#dim(metadata_cultured)
#check numbers per stimulation
#table(metadata_filt$Stimulation)
#select only GFM samples in metadata
metadata_GFM = metadata_cultured[metadata_cultured$Region=="GFM",]
#select only GFM samples in genes counts
genes_counts_GFM <- genes_counts_ordered[,metadata_GFM$Sample]
#order metadata and genes_counts
genes_counts_GFM_ordered <- genes_counts_GFM[,rownames(metadata_GFM)]
#check ordering
all(rownames(metadata_GFM) == colnames (genes_counts_GFM_ordered)) #TRUE[1] TRUE
#round counts; deseq2 can only handle integers
genes_counts_GFM_ordered <- round(genes_counts_GFM_ordered, digits=0)
#make sure covariate variables are the right format
#cannot create dds object with numeric values
metadata_GFM$Donor_id <- as.factor(metadata_GFM$Donor_id)
metadata_GFM$age <- as.integer(metadata_GFM$age)
metadata_GFM$sex <- as.factor(metadata_GFM$sex)
metadata_GFM$Stimulation <- as.factor(metadata_GFM$Stimulation)
metadata_GFM$picard_pct_ribosomal_bases = scale(metadata_GFM$picard_pct_ribosomal_bases)
metadata_GFM$picard_pct_mrna_bases = scale(metadata_GFM$picard_pct_mrna_bases)
metadata_GFM$picard_pct_pf_reads_aligned = scale(metadata_GFM$picard_pct_pf_reads_aligned)
metadata_GFM$picard_pct_percent_duplication = scale(metadata_GFM$picard_pct_percent_duplication)
#adjust for: ~ age + (1|donor_id) + picard_pct_ribosomal_bases + picard_pct_mrna_bases + picard_pct_percent_duplication + picard_pct_pf_reads_aligned #createDeSEQ2 object for LPS
dds <- DESeqDataSetFromMatrix(countData = genes_counts_GFM_ordered,
colData = metadata_GFM,
design = ~ age + sex + picard_pct_ribosomal_bases + picard_pct_mrna_bases + picard_pct_percent_duplication + picard_pct_pf_reads_aligned + Stimulation)
#variable of interest at end of the formula
#Make sure that control group is set as the reference group
dds$Stimulation <- relevel(dds$Stimulation, ref="unstim")
table(dds$Stimulation) unstim ATP DEX IFNy IL4 LPS R848 TNFa 38 3 9 23 7 33 17 23
#head(dds)
#filter: CPM > 1 in 50% of the samples
keep.exp = rowSums(cpm(genes_counts_GFM_ordered) > 1) >= 0.5*ncol(genes_counts_GFM_ordered)
dds = dds[keep.exp,]
#Run differential expression
dds <- DESeq(dds, betaPrior = FALSE)
resultsNames(dds)[1] “Intercept” “age”
[3] “sex_m_vs_f” “picard_pct_ribosomal_bases”
[5] “picard_pct_mrna_bases” “picard_pct_percent_duplication” [7] “picard_pct_pf_reads_aligned” “Stimulation_ATP_vs_unstim”
[9] “Stimulation_DEX_vs_unstim” “Stimulation_IFNy_vs_unstim”
[11] “Stimulation_IL4_vs_unstim” “Stimulation_LPS_vs_unstim”
[13] “Stimulation_R848_vs_unstim” “Stimulation_TNFa_vs_unstim”
# generate results table for LPS vs unstim
res_LPS <- results(dds, name="Stimulation_LPS_vs_unstim")
sum(res_LPS$padj < 0.05, na.rm=TRUE)[1] 376
resOrdered_LPS <- res_LPS[order(res_LPS$pvalue),]
resOrdered_LPS <- as.data.frame(resOrdered_LPS)head(res_LPS)log2 fold change (MLE): Stimulation LPS vs unstim Wald test p-value: Stimulation LPS vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_LPS, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-2.5,2)))
with(subset(res_LPS, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))#The function plotMA shows the log2 fold changes attributable to a given variable over the mean of normalized counts for all the samples in the DESeqDataSet. Points will be colored if the adjusted p value is less than 0.1. Points which fall out of the window are plotted as open triangles pointing either up or down.
plotMA(res_LPS, ylim=c(-2,2))setDT(resOrdered_LPS, keep.rownames = "ensembl")
resOrdered_LPS <- left_join(resOrdered_LPS, gencode_30, by = "ensembl")
resOrdered_LPS_top = resOrdered_LPS[order(resOrdered_LPS$padj) ,]
setDT(resOrdered_LPS_top, keep.rownames = "ensembl")
resOrdered_LPS_top = resOrdered_LPS_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_LPS_top)write.table(resOrdered_LPS_top, "DEG_LPS_GFM.txt")# generate results table for IFNy vs unstim
res_IFNy <- results(dds, name="Stimulation_IFNy_vs_unstim")
sum(res_IFNy$padj < 0.05, na.rm=TRUE)[1] 155
resOrdered_IFNy <- res_IFNy[order(res_IFNy$pvalue),]
resOrdered_IFNy <- as.data.frame(resOrdered_IFNy)head(res_IFNy)log2 fold change (MLE): Stimulation IFNy vs unstim Wald test p-value: Stimulation IFNy vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_IFNy, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_IFNy, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))plotMA(res_IFNy, ylim=c(-2,2))setDT(resOrdered_IFNy, keep.rownames = "ensembl")
resOrdered_IFNy <- left_join(resOrdered_IFNy, gencode_30, by = "ensembl")
resOrdered_IFNy_top = resOrdered_IFNy[order(resOrdered_IFNy$padj) ,]
resOrdered_IFNy_top = head(resOrdered_IFNy)
setDT(resOrdered_IFNy_top, keep.rownames = "ensembl")
resOrdered_IFNy_top = resOrdered_IFNy_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_IFNy_top)write.table(resOrdered_IFNy_top, "DEG_IFNy_GFM.txt")# generate results table for R848 vs unstim
res_R848 <- results(dds, name="Stimulation_R848_vs_unstim")
sum(res_R848$padj < 0.05, na.rm=TRUE)[1] 31
resOrdered_R848 <- res_R848[order(res_R848$pvalue),]
resOrdered_R848 <- as.data.frame(resOrdered_R848)head(res_R848)log2 fold change (MLE): Stimulation R848 vs unstim Wald test p-value: Stimulation R848 vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_R848, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_R848, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))plotMA(res_R848, ylim=c(-2,2))setDT(resOrdered_R848, keep.rownames = "ensembl")
resOrdered_R848 <- left_join(resOrdered_R848, gencode_30, by = "ensembl")
resOrdered_R848_top = resOrdered_R848[order(resOrdered_R848$padj) ,]
setDT(resOrdered_R848_top, keep.rownames = "ensembl")
resOrdered_R848_top = resOrdered_R848_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_R848_top)# generate results table for DEX vs unstim
res_DEX <- results(dds, name="Stimulation_DEX_vs_unstim")
sum(res_DEX$padj < 0.05, na.rm=TRUE)[1] 622
resOrdered_DEX <- res_DEX[order(res_DEX$pvalue),]
resOrdered_DEX <- as.data.frame(resOrdered_DEX)head(res_DEX)log2 fold change (MLE): Stimulation DEX vs unstim Wald test p-value: Stimulation DEX vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_DEX, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_DEX, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))plotMA(res_DEX, ylim=c(-2,2))setDT(resOrdered_DEX, keep.rownames = "ensembl")
resOrdered_DEX <- left_join(resOrdered_DEX, gencode_30, by = "ensembl")
resOrdered_DEX_top = resOrdered_DEX[order(resOrdered_DEX$padj) ,]
setDT(resOrdered_DEX_top, keep.rownames = "ensembl")
resOrdered_DEX_top = resOrdered_DEX_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_DEX_top)#write.table(resOrdered_DEX_top, "DEG_DEX_GFM.txt")res_IL4 <- results(dds, name="Stimulation_IL4_vs_unstim")
sum(res_IL4$padj < 0.05, na.rm=TRUE)[1] 49
resOrdered_IL4 <- res_IL4[order(res_IL4$pvalue),]
resOrdered_IL4 <- as.data.frame(resOrdered_IL4)head(res_IL4)log2 fold change (MLE): Stimulation IL4 vs unstim Wald test p-value: Stimulation IL4 vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_IL4, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_IL4, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))plotMA(res_IL4, ylim=c(-2,2))setDT(resOrdered_IL4, keep.rownames = "ensembl")
resOrdered_IL4 <- left_join(resOrdered_IL4, gencode_30, by = "ensembl")
resOrdered_IL4_top = resOrdered_IL4[order(resOrdered_IL4$padj) ,]
setDT(resOrdered_IL4_top, keep.rownames = "ensembl")
resOrdered_IL4_top = resOrdered_IL4_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_IL4_top)#write.table(resOrdered_IL4_top, "DEG_IL4_GFM.txt")# generate results table for ATP vs unstim
res_ATP <- results(dds, name="Stimulation_ATP_vs_unstim")
sum(res_ATP$padj < 0.05, na.rm=TRUE)[1] 134
resOrdered_ATP <- res_ATP[order(res_ATP$pvalue),]
resOrdered_ATP <- as.data.frame(resOrdered_ATP)head(res_ATP)log2 fold change (MLE): Stimulation ATP vs unstim Wald test p-value: Stimulation ATP vs unstim DataFrame with 6 rows and 6 columns baseMean log2FoldChange lfcSE stat pvalue
with(res_ATP, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-10,10)))
with(subset(res_ATP, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))plotMA(res_ATP, ylim=c(-2,2))setDT(resOrdered_ATP, keep.rownames = "ensembl")
resOrdered_ATP <- left_join(resOrdered_ATP, gencode_30, by = "ensembl")
resOrdered_ATP_top = resOrdered_ATP[order(resOrdered_ATP$padj) ,]
setDT(resOrdered_ATP_top, keep.rownames = "ensembl")
resOrdered_ATP_top = resOrdered_ATP_top[, c("ensembl", "symbol", "baseMean", "log2FoldChange", "lfcSE", "stat", "pvalue", "padj")]
createDT(resOrdered_ATP_top)write.table(resOrdered_ATP_top, "DEG_ATP_GFM.txt")#Create genelists with Log2FC < 1 and < -1 for different stimuli
resOrdered_LPS_p <- subset(resOrdered_LPS_top, padj < 0.05)
resOrdered_LPS_LFC <- subset(resOrdered_LPS_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_LPS_LFC, "LPS_GFM_FC1.txt")
resOrdered_IFNy_p <- subset(resOrdered_IFNy_top, padj < 0.05)
resOrdered_IFNy_LFC <- subset(resOrdered_IFNy_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_IFNy_LFC, "IFNy_GFM_FC1.txt")
resOrdered_R848_p <- subset(resOrdered_R848_top, padj < 0.05)
resOrdered_R848_LFC <- subset(resOrdered_R848_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_R848_LFC, "R848_GFM_FC1.txt")
resOrdered_DEX_p <- subset(resOrdered_DEX_top, padj < 0.05)
resOrdered_DEX_LFC <- subset(resOrdered_DEX_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_DEX_LFC, "DEX_GFM_FC1.txt")
resOrdered_IL4_p <- subset(resOrdered_IL4_top, padj < 0.05)
resOrdered_IL4_LFC <- subset(resOrdered_IL4_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_IL4_LFC, "IL4_GFM_FC1.txt")
resOrdered_ATP_p <- subset(resOrdered_ATP_top, padj < 0.05)
resOrdered_ATP_LFC <- subset(resOrdered_ATP_p, log2FoldChange > 1 | log2FoldChange < -1)
write.table(resOrdered_ATP_LFC, "ATP_GFM_FC1.txt")